#Sourcecode for Elife paper Figure 3; A-C
#By Dr Charles Agoti
#Last updated 29 June 2022

#clear workspace, upload packages, specify working directory
## The aim of this plot is to show the lineage temporal introduction patterns

rm(list=ls())
library(tidyverse); library(janitor); library(artyfarty); library(lubridate);library(scales)

plot_color=c("#000000","#C0C0C0","#696969","#FF0000","#F2D2BD",
             "#800000","#C9A9A6","#00FF00","#008000","#00FFFF","#BA6B57", "#FFA500","#9933FF",
             "#A4C639","#0000FF","#CCCCFF","#FF00FF","#55ACEE", "#FF00FF","#55ACEE")

coast_counties <- c("Mombasa", "Kilifi", "Kwale", "Taita Taveta", "Tana River", "Lamu")

color_coast <- c("#FF0000","#000000","#00FFFF",'#FFA500','#0000FF',"#FF00FF")
min <- as.Date("2020-03-01")
max <- as.Date("2021-02-28")



#Temporal lineage patterns
county_lineage_dta <- read.csv("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Data/county_lineages_dta.csv")%>%
  mutate(datecollection=as.Date(datecollection, "%Y-%m-%d"))%>%
  mutate(month=as.Date(month, "%Y-%m-%d"))
  
  mutate(Lineage=factor(Lineage))
  

Fig_3A <-county_lineage_dta%>%
  filter(!is.na(Lineage))%>%
  mutate(Lineage=factor(Lineage, levels=unique(Lineage)))%>%
  mutate(count=1)%>%
  group_by(datecollection, Lineage, county_rep)%>%
  summarise(n=sum(count))%>%
  ggplot(aes(x=datecollection, y=Lineage))+
  geom_point(aes(color=county_rep, size=n), shape=1)+
  #scale_size(range = c(1, 10))+
  labs(x="Date in 2020/21", y="")+
  geom_vline(xintercept = as.Date("2020-09-16"), size=0.5, linetype="longdash", color="black")+
  geom_text(x=as.Date("2020-06-15"), y=43, label="Wave 1", size=3)+
  geom_text(x=as.Date("2020-12-15"), y=43, label="Wave 2", size=3)+
  scale_x_date(breaks ="2 month", date_minor_breaks="1 month", labels = date_format("%b"))+
  scale_color_manual(values = color_coast)+
  theme_grey()+
  theme(axis.title.x = element_text(size = 11),
        axis.title.y = element_text(size = 11),
        axis.text.x = element_text(size = 11, angle=0),
        strip.text.x = element_text(size=11),
        axis.text.y = element_text(size = 11),
        plot.title = element_text(hjust = 0.5, size = 11, face="bold"),
        legend.position = "right",
        legend.key.size = unit(0.35, "cm"),
        legend.spacing.x = unit(0.20, 'cm'),
        legend.spacing.y = unit(0.20, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        #strip.background = element_rect(fill="white", color = "white"),
        panel.spacing.x = unit(1.2,"lines"),
        legend.box.background = element_blank())+
  guides(color=guide_legend(ncol=1, title = "County", title.position = "top"),
         size=guide_legend(ncol=1, title = "Genome count", title.position = "top"))
pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.3/Fig 3A.pdf", width=5, height =7.02)
print(Fig_3A)
dev.off()
Fig_3A

#################################################...............Figure 4B........................######################################
#The aim of this plot is to show study phase lineage composition
#study_phase_lineages
Fig_3B <- county_lineage_dta%>%
  filter(Lineage!="")%>%
  mutate(lineage_classification=case_when(Lineage=="B.1.1.7" ~ "Alpha",
                                          Lineage=="B.1.351" ~ "Beta",
                                          Lineage=="B.1.525" ~ "Eta",
                                          Lineage=="A.23.1"  ~ "Uganda_VoI",
                                          Lineage=="B.1.549"~ "Kenya specific",
                                          Lineage=="B.1.530" ~ "Kenya specific",
                                          Lineage=="B.1.596.1" ~ "Kenya specific",
                                          Lineage=="N.8" ~ "Kenya specific",
                                          TRUE ~"Other Coastal Kenya"))%>%
  mutate(lineage_classi=case_when(Lineage=="B.1.549"~ "Kenya specific",
                                  Lineage=="B.1.530" ~ "Kenya specific",
                                  Lineage=="B.1.596.1" ~ "Kenya specific",
                                  Lineage=="N.8" ~ "Kenya specific",
                                  TRUE ~"International"))%>%
  
  mutate(Lineage=factor(Lineage, levels=unique(Lineage)))%>%
  mutate(count=1)%>%
  group_by(studyphase, Lineage, lineage_classi)%>%
  summarise(n= sum(count))%>%
  ggplot(aes(x=Lineage, y=n))+
  geom_col(aes(fill=lineage_classi))+
  scale_fill_manual(values=plot_color[c(1,4,10, 4,7,9)])+
  labs(x="", y="Number of sequences")+
  facet_wrap(studyphase~.)+
  scale_y_continuous(breaks = seq(0,450,150))+
  theme_grey()+
  coord_flip()+
  theme(axis.title.x = element_text(size = 11),
        axis.title.y = element_text(size = 11),
        axis.text.x = element_text(size = 11, angle=0),
        strip.text.x = element_text(size=11),
        axis.text.y = element_text(size = 11),
        plot.title = element_text(hjust = 0.5, size = 11, face="bold"),
        legend.position = "bottom",
        legend.key.size = unit(0.20, "cm"),
        legend.spacing.x = unit(0.20, 'cm'),
        legend.spacing.y = unit(0.20, 'cm'),
        legend.text = element_text(size = 11),
        legend.title =element_text(size = 11),
        #strip.background = element_rect(fill="white", color = "white"),
        panel.spacing.x = unit(1.2,"lines"),
        legend.box.background = element_blank())+
  guides(fill=guide_legend(nrow=2, title = "Lineage", title.position = "left"), size=T)
pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.3/Fig.3B.pdf", width =3, height = 7.02)
print(Fig_3B)
dev.off()
Fig_3B

############################################################................Figure 3C -Data....................#######################################################
#Aim of the plot show the monthly Pango lineage distribution by County across the study period
major_lineages<- county_lineage_dta%>%
  filter(!is.na(Lineage))%>%
  tabyl(Lineage)%>%
  filter(n>3)%>%
  mutate(key_lineage=as.character(Lineage))%>%
  pull(key_lineage)


major_lineages
length(major_lineages)

major_lineages2 <-c(major_lineages, "B.1.1.7","B.1.525", "P.1" )

major_lineages
major_lineages2

############################################################................Figure 4C....................#######################################################

Fig_3C <- county_lineage_dta%>%
  filter(Lineage!="")%>%
  mutate(key_lineages=ifelse(Lineage%in%major_lineages2, Lineage, "OtherCoastalKenya"))%>%
  mutate(count=1)%>%
  group_by(month, county_rep, key_lineages)%>%
  summarise(n=sum(count))%>%
  ggplot(aes(x=month, y=n))+
  geom_col(aes(fill=key_lineages))+
  scale_fill_manual(values=plot_color)+
  labs(y="Monthly Number of genomes", x="Month in 2020/21")+
  theme_scientific()+
  scale_x_date(breaks ="2 month", date_minor_breaks="1 month", labels = date_format("%b"), limits = c(min, max))+
  geom_vline(xintercept = as.Date("2020-09-16"), size=0.5, linetype="longdash", color="black")+
  facet_grid(county_rep~., scales="free")+
  facet_wrap(county_rep~., ncol=2,scales="free_y")+
  theme(axis.title.x = element_text(size = 11),
        axis.title.y = element_text(size = 11),
        axis.text.x = element_text(size = 9, angle=0),
        strip.text.x = element_text(size=10),
        axis.text.y = element_text(size = 10),
        plot.title = element_text(hjust = 0.5, size = 10, face="bold"),
        legend.position = "bottom",
        legend.key.size = unit(0.25, "cm"),
        legend.spacing.x = unit(0.20, 'cm'),
        legend.spacing.y = unit(0.20, 'cm'),
        legend.text = element_text(size = 10),
        legend.title =element_text(size = 11),
        strip.background = element_rect(fill="white", color = "white"),
        panel.spacing.x = unit(1.2,"lines"),
        legend.box.background = element_blank())+
  guides(fill=guide_legend(nrow=5, title = "Lineage", title.position = "top"), size=T)
pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.3/Fig.3C.pdf", width =4.5, height = 7.02)
print(Fig_3C)
dev.off()
Fig_3C

Figure_3 <- Fig_3A|Fig_3B|Fig_3C
pdf("~/Dropbox/COVID-19/SECONDWAVE/revisedcode/Figures/Fig.3/Fugure 3.pdf", width =12, height = 8.5)
print(Figure_3)
dev.off()
Figure_3

